home *** CD-ROM | disk | FTP | other *** search
- /*
- * $Id: a2html.c,v 1.8 1999/08/28 19:20:35 tommya Exp $
- *
- * Ascii to html
- *
- * Search "show_help" for usage.
- * Tab not used.
- *
- * THIS PROGRAM IS PUBLIC DOMAIN (NO COPYRIGHT)
- *
- * see changes list of changes.
- *
- * Created by Tommy Andersen 1997,98,99
- * Compiler: Lattice C (Atari) + GCC (Atari or Linux)
- */
-
-
- #ifdef ATARI
- #include <mintbind.h>
- #include <osbind.h>
- #endif
- #include <strings.h>
- #include <stdio.h>
- #include <stdlib.h>
- #include <unistd.h>
-
- #ifdef ATARI
- #define MC " Atari"
- #else
- #define MC " Linux"
- #endif
-
- #ifdef __MINT__
- #define LV " Mintlib"
- #else
- #define LV
- #endif
-
- #define VERSION "0.81" MC LV
- #define WORD_LENGTH 1000
- #define HTML_EXTENSION ".html"
- #define RC_LINE_BUFFER 1000
- #define MAX_VAR_LENGTH 50
-
- /* Control these <br> a2html invents */
- #define DISALLOW_BR 0
- #define ALLOW_BR 1
- #define FORCE_BR 2
-
- #if defined(LATTICE) && !defined(__LATTICE__)
- /* Not using mintlib with Lattice? */
- #define strncasecmp strnicmp
- #endif
-
- const unsigned char header_doctype[] =
- "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 3.2 Final//EN\">\n";
-
- const unsigned char header_tags1[] =
- "<html>\n" \
- "<head>\n" \
- "<title>";
-
- const unsigned char header_tags2[] =
- "</title>\n" \
- "</head>\n" \
- "<!-- Created with a2html "VERSION" -->\n";
-
- const unsigned char pre_start_tag[] = "<pre>\n";
- const unsigned char pre_end_tag[] = "</pre>\n";
-
- const unsigned char bottom_tags[] =
- "</body>\n" \
- "</html>\n";
-
- const unsigned char * const url_types[] =
- { "ftp", "http", "file", "news", "wais", "nntp", "imap", "https", \
- "telnet", "mailto", "gopher", "prospero", NULL
- };
-
- /* If input file is a email, these header lines are displayed */
-
- const unsigned char * const show_emh_field[] =
- { "date", "from", "reply-to", "to", "cc", "subject", NULL };
-
- char *yes_strings[] = { "1", "yes", "true", "sure", NULL };
- char *no_strings[] = { "0", "no", "false", "noway", NULL };
-
- /*
- * Atari charset to iso8859-1 charset
- * Just a simple lookup array
- * The Atari char is used as an index, the lookup value is the iso8859-1 char.
- */
-
- const unsigned char convertAtariToIso88591[]=
- { /* 0-9 */ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
- /* 10-19 */ 10, ' ', ' ', 13, ' ', ' ', ' ', ' ', ' ', ' ',
- /* 20-29 */ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
- /* 30-39 */ ' ', ' ', ' ', '!', 34, '#', '$', '%', '&', 39,
- /* 40-49 */ '(', ')', '*', '+', ',', '-', '.', '/', '0', '1',
- /* 50-59 */ '2', '3', '4', '5', '6', '7', '8', '9', ':', ';',
- /* 60-69 */ '<', '=', '>', '?', '@', 'A', 'B', 'C', 'D', 'E',
- /* 70-79 */ 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
- /* 80-89 */ 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y',
- /* 90-99 */ 'Z', '[', 92 , ']', '^', '_', '`', 'a', 'b', 'c',
- /*100-109*/ 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
- /*110-119*/ 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
- /*120-129*/ 'x', 'y', 'z', '{', '|', '}', '~', ' ', 199, 252,
- /*130-139*/ 233, 226, 228, 224, 229, 231, 234, 235, 232, 239,
- /*140-149*/ 238, 236, 196, 197, 201, 230, 198, 244, 246, 242,
- /*150-159*/ 251, 249, 255, 214, 220, 162, 163, 165, 223, ' ',
- /*160-169*/ 225, 237, 243, 250, 241, 209, ' ', ' ', 191, ' ',
- /*170-179*/ 172, 189, 188, 161, 171, 187, 227, 245, 216, 248,
- /*180-189*/ 230, 198, 192, 195, 213, 168, 180, ' ', 182, 169,
- /*190-199*/ 174, ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
- /*200-209*/ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
- /*210-219*/ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
- /*220-229*/ ' ', 167, ' ', ' ', ' ', 223, ' ', ' ', ' ', ' ',
- /*230-239*/ 181, ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
- /*240-249*/ ' ', 177, ' ', ' ', ' ', ' ', 247, ' ', 176, ' ',
- /*250-255*/ 183, ' ', ' ', 178, 179, 175
- };
-
- /* 8 bit iso-8859-1 to 7bit html */
- /* lookup string table */
-
- /* " = " = */
-
- const unsigned char * const convertIso88591ToHtml[]=
- { /* 0-9 */ " ", " ", " ", " ", " ", " ", " ", " ", " ", " ",
- /* 10-19 */ "\n", " ", " ", NULL, " ", " ", " ", " ", " ", " ",
- /* 20-29 */ " ", " ", " ", " ", " ", " ", " ", " ", " ", " ",
- /* 30-39 */ " ", " ", " ", "!", ""","#", "$", "%", "&", "'",
- /* 40-49 */ "(", ")", "*", "+", ",", "-", ".", "/", "0", "1",
- /* 50-59 */ "2", "3", "4", "5", "6", "7", "8", "9", ":", ";",
- /* 60-69 */ "<", "=", ">", "?", "@", "A", "B", "C", "D", "E",
- /* 70-79 */ "F", "G", "H", "I", "J", "K", "L", "M", "N", "O",
- /* 80-89 */ "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y",
- /* 90-99 */ "Z", "[", "\\","]", "^", "_", "`", "a", "b", "c",
- /*100-109*/ "d", "e", "f", "g", "h", "i", "j", "k", "l", "m",
- /*110-119*/ "n", "o", "p", "q", "r", "s", "t", "u", "v", "w",
- /*120-129*/ "x", "y", "z", "{", "|", "}", "~", " ", " ", " ", /* 128-159 not used */
- /*130-139*/ " ", " ", " ", " ", " ", " ", " ", " ", " ", " ",
- /*140-149*/ " ", " ", " ", " ", " ", " ", " ", " ", " ", " ",
- /*150-159*/ " ", " ", " ", " ", " ", " ", " ", " ", " ", " ",
- /*160-169*/ " ","¡","¢","£","¤","¥","¦","§","¨","©",
- /*170-179*/ "ª","«","¬","","®","¯","°","±","²","³",
- /*180-189*/ "´","µ","¶","·","¸","¹","º","»","¼","½",
- /*190-199*/ "¾","¿","À","Á","Â","Ã","Ä","Å","Æ","Ç",
- /*200-209*/ "È","É","Ê","Ë","Ì","Í","Î","Ï","Ð","Ñ",
- /*210-219*/ "Ò","Ó","Ô","Õ","Ö","×","Ø","Ù","Ú","Û",
- /*220-229*/ "Ü","Ý","Þ","ß","à","á","â","ã","ä","å",
- /*230-239*/ "æ", "ç","è","é","ê","ë","ì","í","î","ï",
- /*240-249*/ "ð","ñ","ò","ó","ô","õ","ö","÷","ø","ù",
- /*250-255*/ "ú","û","ü","ý","þ", "ÿ"
- };
-
- struct file_entry
- { char *input_filename; /* file to convert */
- char *output_filename; /* where to put the output */
- unsigned char *title_html;
- struct file_entry *next;
- } *work_list = NULL;
-
- struct emh_text_entry
- {
- struct emh_text_entry *next;
- unsigned char *text;
- };
-
-
-
- /*
- * Prototypes
- */
-
- int main (int argc, char *[]);
- void output_error_text (char *);
- int make_html_file (char *, char*, const unsigned char*);
- void make_output_filename (const char *, char *, const char*);
- struct file_entry *create_entry (void);
- void free_list (struct file_entry *);
- void show_help (int);
- void output_char (const int, int, FILE *);
- void output_string (const int, const char *, FILE *);
- void output_parse_char (const int, unsigned int, FILE *);
- void output_parse_string (const int, const char *, FILE *);
- void output_word (const int, const unsigned char *word, FILE *handle);
- char *find_filename (char *);
- void read_rc_file (char *);
- void set_var (char *variable, char *value);
- FILE *fopen_rc_file (const char *rc_name, const char *mode);
-
- /*
- * Some default settings
- */
-
- #define ATARICHARSET 0
- #define ISOCHARSET 1
- #define TRUE 1
- #define FALSE 0
- #define NA -1
-
- #ifdef ATARI
- int method = ATARICHARSET; /* input files uses the Atari charset */
- #else
- int method = ISOCHARSET; /* input files uses the ISO-8859-1 charset */
- #endif
- int quiet = FALSE; /* print errors to stderr */
- int pause_required = FALSE; /* don't wait for a keypress */
- int crlf = FALSE; /* use un*x lines */
- int notags = FALSE; /* use tags in output */
- int nopre = FALSE; /* use <pre> tags */
- int nolinks = FALSE; /* check for links */
- int nomail = FALSE; /* check for emailadress */
- int nobr = FALSE; /* use <br> if <nopre> is true */
- int doctype = TRUE; /* output html doctype */
- int emailsource = FALSE; /* Input file is a normal ascii file */
- unsigned char *footer_html = NULL; /* footer text - raw html code */
- unsigned char *separator_html = NULL; /* Emailer separator - raw html code */
- char *bodybgcolor = NULL;
- char *bodytext = NULL;
- char *bodylink = NULL;
-
-
-
- #define ARGUMENT_IS(x) (!strcmp ((x), argv[a]))
- #undef max
- #define max(x,y) ((x) > (y) ? (x) : (y))
-
- int
- main (int argc, char *argv[])
- {
- int a = 1;
- int ret = 0;
- int save_argc = argc;
-
- struct file_entry *current, *prev;
-
- #ifdef ATARI
- #if defined(LATTICE) && !defined(__LATTICE__)
- Pdomain (1); /* mint domain, gcc (MiNT) doesn't need it */
- fmode (stdout,1); /* binary output -- no translation VERY IMPORTANT! */
- #else
- stdout->_flag |= _IOBIN; /* force binary output (mintlib) */
- #endif
- #endif
-
- read_rc_file ("a2htmlrc");
-
- current = work_list = create_entry ();
-
- if (!current)
- exit (1); /* low memory! Shouldn't be possible */
-
-
- /* parse command line and build a list of files */
- while (--argc > 0)
- {
- if (ARGUMENT_IS ("-iso") || ARGUMENT_IS("-iso-8859-1"))
- method = ISOCHARSET;
- else if (ARGUMENT_IS ("-atari"))
- method = ATARICHARSET;
- else if (ARGUMENT_IS ("-q") || ARGUMENT_IS ("-quiet"))
- quiet = TRUE;
- else if (ARGUMENT_IS ("-p") || ARGUMENT_IS ("-pause"))
- pause_required = TRUE;
- else if (ARGUMENT_IS ("-crlf"))
- crlf = TRUE;
- else if (ARGUMENT_IS ("-notags"))
- notags = TRUE;
- else if (ARGUMENT_IS ("-nolinks"))
- nolinks = TRUE;
- else if (ARGUMENT_IS ("-nomail"))
- nomail = TRUE;
- else if (ARGUMENT_IS ("-nopre"))
- nopre = TRUE;
- else if (ARGUMENT_IS ("-nobr"))
- nobr = TRUE;
- else if (ARGUMENT_IS ("-nodoctype"))
- doctype = FALSE;
- else if (ARGUMENT_IS ("-emailsource"))
- {
- emailsource = TRUE;
- method = ISOCHARSET; /* forced */
- }
- else if (ARGUMENT_IS ("--help"))
- {
- show_help (TRUE);
- if (pause_required)
- {
- output_error_text ("\nPress Return\n");
- getchar ();
- }
- exit (1);
- }
- else if (ARGUMENT_IS ("--version"))
- {
- show_help (FALSE);
- if (pause_required)
- {
- output_error_text ("\nPress Return\n");
- getchar ();
- }
- exit (1);
- }
- else if (ARGUMENT_IS ("-dumpargs"))
- {
- int a=0;
- char *some_text = (char*) malloc (2000);
-
- while (save_argc-- && some_text)
- {
- sprintf (some_text, "Arg no. %d is (%s)\n", a, argv[a] ? argv[a] : "nothing");
- output_error_text (some_text);
- a++;
- }
- if (some_text)
- free (some_text);
- }
- else if (ARGUMENT_IS ("-footer")) /* footer text specified */
- {
- a++;
- if (--argc > 0) /* must have an extra argument */
- {
- if (footer_html)
- free (footer_html);
- footer_html = (char*) strdup (argv[a]);
- }
- }
- else if (ARGUMENT_IS ("-separator")) /* separator text specified */
- {
- a++;
- if (--argc > 0) /* must have an extra argument */
- {
- if (separator_html)
- free (separator_html);
- separator_html = (char*) strdup (argv[a]);
- }
- }
- else if (ARGUMENT_IS ("-bodybgcolor")) /* body background color specified */
- {
- a++;
- if (--argc > 0)
- {
- if (bodybgcolor)
- free (bodybgcolor);
- bodybgcolor = (char*) strdup (argv[a]);
- }
- }
- else if (ARGUMENT_IS ("-bodytext")) /* body text color specified */
- {
- a++;
- if (--argc > 0)
- {
- if (bodytext)
- free (bodytext);
- bodytext = (char*) strdup (argv[a]);
- }
- }
- else if (ARGUMENT_IS ("-bodylink")) /* body link color specified */
- {
- a++;
- if (--argc > 0)
- {
- if (bodylink)
- free (bodylink);
- bodylink = (char*) strdup (argv[a]);
- }
- }
-
- else if (ARGUMENT_IS ("-") || ARGUMENT_IS ("-stdout")) /* stdout output */
- {
- if (current && current->output_filename)
- {
- free (current->output_filename);
- current->output_filename = NULL;
- }
- }
- else if (ARGUMENT_IS ("-o") || ARGUMENT_IS ("-out")) /* output filename specified */
- {
- a++;
- if (--argc > 0) /* must have an extra argument */
- {
- if (current)
- {
- if (current->output_filename)
- free (current->output_filename);
- current->output_filename = (char*) strdup (argv[a]);
- }
- }
- }
- else if (ARGUMENT_IS ("-title")) /* new title specified */
- {
- a++;
- if (--argc > 0) /* must have an extra argument */
- {
- if (current)
- {
- if (current->title_html)
- free (current->title_html);
- current->title_html = (unsigned char*) strdup (argv[a]);
- }
- }
- }
- else
- {
- /* Well, only files (and illegal switches) comes here */
- if (current && current->input_filename)
- {
- /* this entry was allready used, so make a new one */
- prev = current;
- current = create_entry ();
- prev->next = current;
- }
- if (current)
- {
- current->input_filename = (char*) strdup (argv[a]); /* copy that name */
- /* make some space for the output filename */
- current->output_filename = (char*) calloc (strlen (current->input_filename) + sizeof (HTML_EXTENSION) + 1, sizeof (char));
- /* calculate the outputname */
- make_output_filename (current->input_filename, current->output_filename, HTML_EXTENSION);
- }
- }
- a++;
- } /* while */
-
- #ifdef DEBUG
- current = work_list;
-
- while (current)
- {
- fprintf (stderr, "Debug: Filelist = %s %s\n",
- (current->input_filename) ? current->input_filename : "null",
- (current->output_filename) ? current->output_filename : "null" );
- current = current->next;
- }
- #endif
-
- /* run through the list and create the files */
-
- current = work_list;
-
- while (current)
- {
- ret += make_html_file (current->input_filename, current->output_filename, current->title_html);
- current = current->next;
- }
- free_list (work_list);
-
- if (footer_html)
- free (footer_html);
- if (separator_html)
- free (separator_html);
- if (bodybgcolor)
- free (bodybgcolor);
- if (bodytext)
- free (bodytext);
- if (bodylink)
- free (bodylink);
-
- if (pause_required)
- {
- output_error_text ("\nPress Return\n");
- getchar ();
- }
- return (ret > 0) ? 1 : 0;
- }
-
- /*
- * Parse it and set variables
- */
-
-
- void
- read_rc_file (char *rc_name)
- {
- FILE *fp_rc = fopen_rc_file (rc_name, "r");
-
- if (fp_rc)
- {
- char *buffer = (char*) malloc (RC_LINE_BUFFER+1);
- char *var = (char*) malloc (MAX_VAR_LENGTH+1);
- char *val = (char*) malloc (RC_LINE_BUFFER+1);
-
- if (buffer && var && val)
- {
- /* a rc line looks like "variable = value\n" */
- while (fgets (buffer, RC_LINE_BUFFER, fp_rc))
- {
- if (buffer[0] != '#')
- {
- char *p; /* pointer to work with */
- char *p_equal = NULL; /* will later pointer at = */
- /* remove trailing spaces and nasty codes from end */
- p = buffer + strlen (buffer) - 1;
- while ((p >= buffer) && (*p <= ' '))
- *p-- = '\0';
- if (buffer[0]) /* still something left? */
- {
- if ((p_equal = strchr (buffer, '=')) > buffer)
- {
- /* find variable name */
- *p_equal = '\0'; /* split buffer */
- p = buffer + strlen (buffer) - 1; /* remove spaces after variable */
- while ((p >= buffer) && (*p <= ' '))
- *p-- = '\0';
- p = buffer; /* remove spaces before variable */
- while (*p && (*p <= ' '))
- p++;
- strncpy (var, p, MAX_VAR_LENGTH);
- var[MAX_VAR_LENGTH] = '\0'; /* allocated +1, so it's okay */
-
- /* get the value */
- p = p_equal + 1;
- while (*p && (*p <= ' ')) /* skip spaces before value */
- p++;
- strcpy (val, p);
-
- if (var[0]) /* allow empty values, but variable must be defined */
- set_var (var, val);
- }
-
- }
- }
- }
- }
- if (buffer)
- free (buffer);
- if (var)
- free (var);
- if (val)
- free (val);
- fclose (fp_rc);
- }
- }
-
-
-
- void
- set_var (char *variable, char *value)
- {
- register int bool_val;
-
- if (variable && value)
- {
- bool_val = NA;
- if (value[0])
- {
- char **p = yes_strings;
- while (*p)
- { if (!strcmp (value, *p))
- bool_val = TRUE;
- p++;
- }
- p = no_strings;
- while (*p)
- { if (!strcmp (value, *p))
- bool_val = FALSE;
- p++;
- }
- }
- /* some trivial tests going on here... */
- if (!strcmp ("iso", variable) || !strcmp ("iso-8859-1", variable))
- {
- if (bool_val == TRUE)
- method = ISOCHARSET;
- else if (bool_val == FALSE)
- method = ATARICHARSET;
- }
- if (!strcmp ("atari", variable))
- {
- if (bool_val == TRUE)
- method = ATARICHARSET;
- else if (bool_val == FALSE)
- method = ISOCHARSET;
- }
- if (!strcmp ("quiet", variable))
- {
- if (bool_val == TRUE)
- quiet = TRUE;
- else if (bool_val == FALSE)
- quiet = FALSE;
- }
- if (!strcmp ("pause", variable))
- {
- if (bool_val == TRUE)
- pause_required = TRUE;
- else if (bool_val == FALSE)
- pause_required = FALSE;
- }
- if (!strcmp ("crlf", variable))
- {
- if (bool_val == TRUE)
- crlf = TRUE;
- else if (bool_val == FALSE)
- crlf = FALSE;
- }
- if (!strcmp ("notags", variable))
- {
- if (bool_val == TRUE)
- notags = TRUE;
- else if (bool_val == FALSE)
- notags = FALSE;
- }
- if (!strcmp ("nolinks", variable))
- {
- if (bool_val == TRUE)
- nolinks = TRUE;
- else if (bool_val == FALSE)
- nolinks = FALSE;
- }
- if (!strcmp ("nomail", variable))
- {
- if (bool_val == TRUE)
- nomail = TRUE;
- else if (bool_val == FALSE)
- nomail = FALSE;
- }
- if (!strcmp ("nopre", variable))
- {
- if (bool_val == TRUE)
- nopre = TRUE;
- else if (bool_val == FALSE)
- nopre = FALSE;
- }
- if (!strcmp ("nobr", variable))
- {
- if (bool_val == TRUE)
- nobr = TRUE;
- else if (bool_val == FALSE)
- nobr = FALSE;
- }
- if (!strcmp ("nodoctype", variable))
- {
- if (bool_val == TRUE)
- doctype = FALSE; /* pitfall */
- else if (bool_val == FALSE)
- doctype = TRUE;
- }
- if (!strcmp ("emailsource", variable))
- {
- if (bool_val == TRUE)
- {
- emailsource = TRUE;
- method = ISOCHARSET;
- }
- else if (bool_val == FALSE)
- emailsource = FALSE;
- }
- if (!strcmp ("footer", variable))
- {
- if (footer_html)
- free (footer_html);
- footer_html = NULL;
- if (value[0])
- footer_html = (char*) strdup (value);
- }
- if (!strcmp ("separator", variable))
- {
- if (separator_html)
- free (separator_html);
- separator_html = NULL;
- if (value[0])
- separator_html = (char*) strdup (value);
- }
- if (!strcmp ("bodybgcolor", variable))
- {
- if (bodybgcolor)
- free (bodybgcolor);
- bodybgcolor = NULL;
- if (value[0])
- bodybgcolor = (char*) strdup (value);
- }
- if (!strcmp ("bodytext", variable))
- {
- if (bodytext)
- free (bodytext);
- bodytext = NULL;
- if (value[0])
- bodytext = (char*) strdup (value);
- }
- if (!strcmp ("bodylink", variable))
- {
- if (bodylink)
- free (bodylink);
- bodylink = NULL;
- if (value[0])
- bodylink = (char*) strdup (value);
- }
- }
- }
-
-
- /*
- * Search for a2htmlrc file in
- * 1) current working directory
- * 2) HOME (env variable)
- * Looks first after a2htmlrc, then .a2htmlrc
- * Opens the file and returns filepointer
- */
-
- FILE *
- fopen_rc_file (const char *rc_name, const char *mode)
- {
- char *cwd_path = getcwd ( NULL, 0); /* string is auto. allocated */
- char *home_path = getenv ("HOME"); /* string not allocated (just a borrow pointer) */
- char *slash_cwd = "/";
- char *slash_home = "/";
- char *a2htmlrc_file = NULL;
- FILE *fp_rc;
- int p_size = 0;
-
- if (cwd_path)
- p_size = (int) strlen (cwd_path);
- if (home_path) /* Well, just in case it doesn't exist! */
- p_size = max (p_size, (int) strlen (home_path));
- if (rc_name)
- p_size += (int) strlen (rc_name);
- a2htmlrc_file = (char*) malloc (p_size + 3);
-
- if (rc_name && a2htmlrc_file && (cwd_path || home_path))
- {
- /* first look for a2htmlrc in current working directory */
- if (cwd_path)
- {
- /* determine slash type */
- #if defined(ATARI) && !defined(__MINT__)
- /* Using \ in path ?? Not executed in MiNTLib or Linux - could be a BAD THING (TM)*/
- if ((cwd_path[1] == ':') || strchr (cwd_path, '\\'))
- slash_cwd = "\\";
- #endif
- strcpy (a2htmlrc_file, cwd_path);
- strcat (a2htmlrc_file, slash_cwd);
- strcat (a2htmlrc_file, rc_name);
- /* try a2htmlrc */
- fp_rc = fopen (a2htmlrc_file, mode);
- if (!fp_rc)
- {
- strcpy (a2htmlrc_file, cwd_path);
- strcat (a2htmlrc_file, slash_cwd);
- strcat (a2htmlrc_file, ".");
- strcat (a2htmlrc_file, rc_name);
- /* try .a2htmlrc */
- fp_rc = fopen (a2htmlrc_file, mode);
- }
- }
- /* then look for it in home directory */
- if (!fp_rc && home_path)
- {
- /* Hopefully nobody uses \ in fs with / as slash... */
- if ((home_path[1] == ':') || strchr (home_path, '\\'))
- slash_home = "\\";
- strcpy (a2htmlrc_file, home_path);
- strcat (a2htmlrc_file, slash_home);
- strcat (a2htmlrc_file, rc_name);
- /* try a2htmlrc in home */
- fp_rc = fopen (a2htmlrc_file, mode);
- if (!fp_rc)
- {
- strcpy (a2htmlrc_file, home_path);
- strcat (a2htmlrc_file, slash_home);
- strcat (a2htmlrc_file, ".");
- strcat (a2htmlrc_file, rc_name);
- /* try .a2htmlrc in home */
- fp_rc = fopen (a2htmlrc_file, mode);
- }
- }
- }
- if (a2htmlrc_file)
- free (a2htmlrc_file);
- if (cwd_path)
- free (cwd_path);
- return fp_rc;
- }
-
-
- struct file_entry
- *create_entry (void)
- {
- return (struct file_entry*) calloc (1, sizeof (struct file_entry));
- }
-
-
- void
- free_list (struct file_entry* start)
- {
- struct file_entry* next;
- struct file_entry* current = start;
-
- while (current)
- {
- next = current->next;
- if (current->input_filename)
- free (current->input_filename);
- if (current->output_filename)
- free (current->output_filename);
- if (current->title_html)
- free (current->title_html);
- free (current);
- current = next;
- }
- }
-
-
- /*
- * make ex test.txt into test.html, if new_ext is html
- * special case: if out_filename is present, no filtype substitution is made.
- */
-
- void
- make_output_filename (const char *in_filename, char *out_filename, const char *new_ext)
- {
- if (!in_filename || !out_filename || !*in_filename)
- return;
-
- if (!*out_filename) /* inputfile, but no outputfile??? */
- {
- register unsigned char *p = NULL;
- register int has_ext = FALSE;
-
- /* use same name as inputfile */
- strcpy (out_filename, in_filename);
-
- /* replace extension with html if name has a dot in it */
- if (p = strrchr (out_filename, '.'))
- {
- /* check that the dot is 'after' / or \ (not a dot in path) */
- if ((p > (unsigned char *)strrchr (out_filename, '/')) && (p > (unsigned char *)strrchr (out_filename, '\\')) )
- {
- strcpy (p, new_ext);
- has_ext = TRUE;
- }
- }
- if (!has_ext)
- strcat (out_filename, new_ext); /* no ext, let's give it a html extension */
- }
- }
-
-
- void
- show_help (int all)
- {
-
- #ifdef ATARI
- output_error_text ( \
- "a2html "VERSION ", Compiled "__DATE__".\n" \
- "Converts a textfile into an iso-8859-1 (7bit) htmlfile\n" \
- "Written by Tommy Andersen (tommya@post3.tele.dk). Public Domain\n" \
- );
-
- if (all)
- output_error_text (
- "Usage: a2html [[options] textfile [htmlfile]]\n" \
- " Charset options:\n" \
- " -iso, -iso-8859-1 Textfile uses iso-8859-1 charset.\n" \
- " -atari Textfile uses Atari charset (default).\n" \
- " Speciel options:\n" \
- " -q, -quiet Quiet mode - no errortext.\n" \
- " -p, -pause Wait for a keypress before quit.\n" \
- " --help This help.\n" \
- " --version Display a2html version.\n" \
- " Input options:\n" \
- " <textfile> Read from textfile.\n" \
- " no <textfile> Read input from stdin.\n" \
- " Input type:\n" \
- " -emailsource Textfile is an email.\n" \
- " Output options:\n" \
- " -o <htmlfile>, -out <htmlfile>\n" \
- " Write to htmlfile.\n" \
- " no <htmlfile> Write to textfile, filetype changed to html.\n" \
- " If input is read from stdin, stdout will be used.\n" \
- " -, -stdout Force output to stdout.\n" \
- " Html options:\n" \
- " -crlf Lines with CR + LF.\n" \
- " -nodoctype No doctype in htmlfile.\n" \
- " -title <string> Use string as title in htmlfile.\n" \
- " -bodybgcolor <color>\n" \
- " Change background color.\n" \
- " -bodytext <color> Change text color.\n" \
- " -bodylink <color> Change link color.\n" \
- " -notags No <header><title><body> tags in htmlfile.\n" \
- " -nopre No <pre> tags in output - <br> are generated.\n"\
- " -nobr No <br> if -nopre is used.\n" \
- " -footer <string> Use string as footer.\n" \
- " -separator <string> Use string as separator (-emailsource).\n" \
- " Url options:\n" \
- " -nolinks Don't make href in htmlfile.\n" \
- " -nomail Don't make mailto in htmlfile.\n" \
- );
- #else
- output_error_text ( \
- "a2html "VERSION ", Compiled "__DATE__".\n" \
- "Converts a textfile into an iso-8859-1 (7bit) htmlfile\n" \
- "Written by Tommy Andersen (tommya@post3.tele.dk) Public Domain\n" \
- );
-
- if (all)
- output_error_text (
- "Usage: a2html [[options] textfile [htmlfile]]\n" \
- " Charset options:\n" \
- " -iso, -iso-8859-1 Textfile uses iso-8859-1 charset (default).\n" \
- " -atari Textfile uses Atari charset.\n" \
- " Speciel options:\n" \
- " -q, -quiet Quiet mode - no errortext.\n" \
- " -p, -pause Wait for a keypress before quit.\n" \
- " --help This help.\n" \
- " --version Display a2html version.\n" \
- " Input options:\n" \
- " <textfile> Read from textfile.\n" \
- " no <textfile> Read input from stdin.\n" \
- " Input type:\n" \
- " -emailsource Textfile is an email.\n" \
- " Output options:\n" \
- " -o <htmlfile>, -out <htmlfile>\n" \
- " Write to htmlfile.\n" \
- " no <htmlfile> Write to textfile, filetype changed to html.\n" \
- " If input is read from stdin, stdout will be used.\n" \
- " -, -stdout Force output to stdout.\n" \
- " Html options:\n" \
- " -crlf Lines with CR + LF.\n" \
- " -nodoctype No doctype in htmlfile.\n" \
- " -title <string> Use string as title in htmlfile.\n" \
- " -bodybgcolor <color>\n" \
- " Change background color.\n" \
- " -bodytext <color> Change text color.\n" \
- " -bodylink <color> Change link color.\n" \
- " -notags No <header><title><body> tags in htmlfile.\n" \
- " -nopre No <pre> tags in output - <br> are generated.\n"\
- " -nobr No <br> if -nopre is used.\n" \
- " -footer <string> Use string as footer.\n" \
- " -separator <string> Use string as separator (-emailsource).\n" \
- " Url options:\n" \
- " -nolinks Don't make href in htmlfile.\n" \
- " -nomail Don't make mailto in htmlfile.\n" \
- );
- #endif
- }
-
-
-
- int
- make_html_file (char *input_filename, char *output_filename, const unsigned char *title)
- {
- register int c;
- unsigned char word[WORD_LENGTH+1];
- register FILE *inhandle = NULL;
- register FILE *outhandle = NULL;
-
- /* check for same input file as outputfile */
-
- if (input_filename && output_filename)
- if (!strcmp (input_filename, output_filename))
- {
- output_error_text ("Error: inputfile same as outputfile!\n");
- return 1;
- }
-
- if (input_filename)
- inhandle = fopen (input_filename, "rb");
- else
- inhandle = stdin;
-
-
- if (inhandle) /* don't create files, if inputfile couldn't be opened */
- {
- if (output_filename)
- outhandle = fopen (output_filename, "wb");
- else
- outhandle = stdout;
- }
-
- if (inhandle && outhandle)
- {
- /* Write Html headers */
-
- if (!notags)
- {
- char *some_text = (char*) malloc (1000);
-
- if (doctype)
- output_string (FALSE, header_doctype, outhandle);
-
- output_string (FALSE, header_tags1, outhandle);
-
- if (title)
- output_parse_string (FALSE, title, outhandle);
- else if (input_filename && *input_filename)
- output_parse_string (FALSE, find_filename (input_filename), outhandle);
- else
- output_parse_string (FALSE, "No title", outhandle);
-
- output_string (FALSE, header_tags2, outhandle);
-
- /* make a body */
- if (some_text)
- {
- /* bgcolor */
- sprintf (some_text, "<body bgcolor=\"%s\"", (bodybgcolor) ? bodybgcolor : "White" );
- output_string (FALSE, some_text, outhandle);
-
- sprintf (some_text, " text=\"%s\"", (bodytext) ? bodytext : "Black" );
- output_string (FALSE, some_text, outhandle);
-
- sprintf (some_text, " link=\"%s\"", (bodylink) ? bodylink : "Blue" );
- output_string (FALSE, some_text, outhandle);
-
- output_string (FALSE, ">\n", outhandle);
-
- free (some_text);
- }
- else
- {
- output_string (FALSE, "<body bgcolor=\"White\" text=\"Black\" link=\"Blue\">\n", outhandle);
- }
- }
-
-
- /* Speciel input file */
- /* Emails should never use Atari charset, so we assumes iso-8859-1 is used */
- /* Quoted-printable and mime encoded emails are _not_ decoded */
- /* The complete header is inserted into a comment (outside the <pre> tags ) */
- /* Valid fields are 'saved' and later output'd */
-
- if (emailsource)
- {
- int email_header_mode = TRUE;
- int line_must_out = FALSE;
- unsigned char *emh_line = (unsigned char*) malloc (2000+1); /* many $ mailers breaks the rules */
- unsigned char *p;
- struct emh_text_entry *current, *emh_work_list;
-
- current = emh_work_list = (struct emh_text_entry*) calloc (1, sizeof (struct emh_text_entry));
-
- if (emh_line && emh_work_list)
- {
- output_string (FALSE, "<!-- Complete header follows -->\n", outhandle);
-
- while (!feof (inhandle) && email_header_mode)
- {
- emh_line[0] = '\0';
- if (fgets (emh_line, 2000, inhandle))
- {
- if (emh_line[0] == '\r' || emh_line[0] == '\n')
- {
- email_header_mode = FALSE; /* marker reached */
- line_must_out = FALSE;
- }
- else if (emh_line[0] > ' ')
- {
- int i = 0;
- line_must_out = FALSE;
-
- /* check for a valid field to be saved */
- while (!line_must_out && show_emh_field[i] )
- {
- if (!strncasecmp (emh_line, show_emh_field[i], strlen (show_emh_field[i])))
- {
- line_must_out = TRUE;
- }
- i++;
- }
- }
-
- /* Save the field */
- if (line_must_out && current)
- {
- if (!current->text)
- current->text = (unsigned char*) strdup (emh_line);
-
- if (current->next = (struct emh_text_entry*) calloc (1, sizeof (struct emh_text_entry)))
- current = current->next;
- }
-
- output_string (FALSE, "<!-- ", outhandle);
- p = emh_line; /* remove unwanted characters from comments */
- while (*p)
- {
- if (*p < ' ')
- *p = ' ';
- p++;
- }
- output_string (FALSE, emh_line, outhandle);
- output_string (FALSE, " -->\n", outhandle);
- } /* if fgets */
- } /* while */
- output_string (FALSE, "<!-- End of header -->\n", outhandle);
-
- if (!nopre)
- output_string (FALSE, pre_start_tag, outhandle);
-
- /* output the fields to be displayed */
- current = emh_work_list;
- while (current)
- {
- /* lets parse and output fields */
- if (current->text)
- {
- register unsigned char *p = current->text;
-
- while (*p)
- {
- c = *p++;
- if ( c <= ' ' || c == '<' || c == '>' || c == '\"' || c=='(' || c == ';' || c=='\'' || c==')' || c=='/')
- output_parse_char (TRUE, c, outhandle);
- else
- {
- /* build a word */
- int i = 0;
- word[i++] = (unsigned char) c;
- while (*p && c > ' ' && c!= '<' && c!= '>' && c!='\"')
- {
- c = *p++;
- if ( c > ' ' && c!= '<' && c!= '>' && c != '\"')
- if (i < WORD_LENGTH)
- word[i++] = (unsigned char) c;
- }
- word[i] = '\0';
- output_word (TRUE, word, outhandle);
- output_parse_char (TRUE, c, outhandle);
- }
- }
- }
- current = current->next;
- }
- if (!nopre)
- output_string (FALSE, pre_end_tag, outhandle);
-
- if (separator_html)
- {
- output_string (FALSE, separator_html, outhandle);
- output_string (FALSE, "\n", outhandle);
- }
- else
- if (nopre)
- output_string (FALSE, "<br>\n", outhandle);
-
- /* Cleanup the malloc'ed mess */
- free (emh_line);
- current = emh_work_list;
- while (current)
- {
- register struct emh_text_entry *temp;
-
- if (current->text)
- free (current->text);
- temp = current->next;
- free (current);
- current = temp;
- }
- } /* if */
- }
- /* End of speciel input file */
-
-
- /* And the text (body) */
-
- if (!nopre)
- output_string (FALSE, pre_start_tag, outhandle);
-
- word[0] = '\0';
-
- while (!feof (inhandle))
- {
- c = fgetc (inhandle);
- if ( c >= 0) /* c will be <0 if eof reached */
- {
- if ( c <= ' ' || c == '<' || c == '>' || c == '\"' || c=='(' || c == ';' || c=='\'' || c==')' || c=='/')
- output_parse_char (TRUE, c, outhandle); /* not the beginning of a word, so just output it */
- else
- {
- /* build a word */
- int i = 0;
- word[i++] = (unsigned char) c;
- while (!feof (inhandle) && c > ' ' && c!= '<' && c!= '>' && c!='\"')
- {
- c = fgetc (inhandle);
- if ( c > ' ' && c!= '<' && c!= '>' && c != '\"')
- if (i < WORD_LENGTH)
- word[i++] = (unsigned char) c;
- }
- word[i] = '\0';
- output_word (TRUE, word, outhandle);
- if (!feof (inhandle))
- output_parse_char (TRUE, c, outhandle);
- }
- }
- }
- if (inhandle != stdin) /* clean close */
- fclose (inhandle);
-
- /* Now write the end tags */
- output_string (FALSE, "\n", outhandle); /* ensure that the end tags are starting on a new line */
- if (!nopre)
- output_string (FALSE, pre_end_tag, outhandle);
-
- if (footer_html)
- {
- output_string (FALSE, footer_html, outhandle);
- output_string (FALSE, "\n", outhandle); /* ensures that the next tags are starting on a new line */
- }
-
- if (!notags)
- output_string (FALSE, bottom_tags, outhandle);
-
- if (outhandle != stdout)
- fclose (outhandle); /* well, should check the return value here */
-
- }
- else
- {
- char *sometext = (char*) malloc (1000); /* should be enough */
- if (sometext)
- {
- sprintf (sometext, "Can not open inputfile [%s] or outputfile [%s]\n",
- (input_filename ? input_filename : "stdin"), (output_filename ? output_filename : "stdout"));
- output_error_text (sometext);
- free (sometext);
- }
- return 1;
- }
- return 0;
- }
-
-
- /*
- * Print some errortext to stderr
- * Errortext will no be printed if in quiet mode
- */
- void
- output_error_text (char *string)
- {
- if (!quiet)
- fprintf (stderr, string);
- }
-
-
- /*
- * Return the address of first letter in the filename
- * argument string is a path+filename
- * ex: d:\directory\filename.ext
- * ^string ^return
- */
- char *
- find_filename (char *string)
- {
- register char *p=string;
-
- if (string)
- {
- p += strlen (string);
- while (p>string && *(p-1) !='\\' && *(p-1) !=':' && *(p-1)!='/' )
- p--;
- }
- return p;
- }
-
-
- /*
- * Outputs a char to the html file
- * No charset parsing
- * If the char is a LF (ascii 10), then it will finish the line.
- */
- void
- output_char (int allow_br, int c, FILE *handle)
- {
- if (c == 10)
- {
- if (nopre && !nobr && allow_br) /* end of line and no pre -> we make the line ends with <br> */
- output_string (FALSE, "<br>", handle); /* note! recursive! Never \n in this one!! */
- if (crlf)
- fputc (13, handle);
- }
- fputc (c, handle);
- }
-
-
- /*
- * Outputs a string to the html file
- * No charset parsing
- */
- void
- output_string (int allow_br, const char *s, FILE *handle)
- {
- if (s)
- while (*s)
- output_char (allow_br, *s++, handle);
- }
-
-
- /*
- * Outputs a char to the html file
- * Char will be charset parsed
- */
- void
- output_parse_char (int allow_br, unsigned int c, FILE *handle)
- {
- if (method == ATARICHARSET)
- c = convertAtariToIso88591[ c & 0xff ];
- output_string (allow_br, convertIso88591ToHtml[ c & 0xff ], handle);
- }
-
-
- /*
- * Outputs a string to the html file
- * String will be charset parsed
- */
- void
- output_parse_string (int allow_br, const char *s, FILE *handle)
- {
- register unsigned int c;
-
- if (s)
- while (*s)
- {
- c = (unsigned int) *s++;
- if (method == ATARICHARSET)
- c = convertAtariToIso88591[ c & 0xff ];
- output_string (allow_br, convertIso88591ToHtml[ c & 0xff], handle);
- }
- }
-
- /*
- * check word for any url style in it and make a link.
- * outputs the word too.
- * Not 100% fool-proof, but shouldn't miss too many url's.
- */
- void
- output_word (int allow_br, const unsigned char *word, FILE *handle)
- {
- unsigned char *url_checker = NULL;
- unsigned char *email_checker = NULL;
-
- if ( !nolinks && (url_checker = strchr (word, ':')))
- {
- /* examples: (http://localhost/) "ftp://sunsite.auc.dk". */
- unsigned char url_host[WORD_LENGTH+1];
- int url_ok = FALSE;
- int url_index = 0;
- int pre_chars = (int)(url_checker - word);
-
- /* check for <url:http://...> type */
- /* if so, then move the url_checker pointer to the second : */
- if (pre_chars >= strlen ("url") )
- if (!strncasecmp (url_checker-(long)strlen ("url"), "url", strlen ("url") ))
- {
- unsigned char *p = url_checker;
- url_checker = strchr (url_checker+1, ':'); /* find next : */
- pre_chars = (int) (url_checker - p - 1); /* recalculate it */
- }
- /* look for a valid urltype */
- while ( url_types[url_index] && !url_ok && url_checker) /* an url_type, not found and : */
- {
- if (pre_chars == strlen (url_types[url_index]) ) /* enough (exact) chars in word before : */
- {
- if (!strncasecmp (url_checker-(long)strlen (url_types[url_index]), url_types[url_index], strlen (url_types[url_index]) ))
- {
- /* found a match */
- register unsigned char *p = NULL;
- int count = (int)(url_checker - word +1); /* chars before the url */
- strcpy (url_host, url_checker+1);
- /* now... remove any " */
- if (*url_host)
- if (p = strchr (url_host, '\"'))
- *p = '\0';
- /* remove any < - these can't be in the word, but the " removing
- * thing above could dig something nasty out of the url.
- */
- if (*url_host)
- if (p = strchr (url_host, '<'))
- *p = '\0';
- /* remove any > */
- if (*url_host)
- if (p = strchr (url_host, '>'))
- *p = '\0';
- /* remove trailing ,.;\?! */
- if (*url_host)
- {
- p = url_host + strlen (url_host) - 1;
- while ((p >= url_host) && strchr (",.;:!?\\", (int)*p)) /* protect me */
- *p-- = '\0';
- }
- /* count () and remove unbalanced ) in url */
- if (*url_host) /* still something left? */
- {
- int par_start = 0;
- int par_end = 0;
- unsigned char *p2 = NULL;
- p = url_host;
- while (p = strchr (p, '('))
- {
- par_start++;
- p++;
- }
- p = url_host;
- while (p = strchr (p, ')'))
- {
- par_end++;
- p++;
- }
- while (par_end-- > par_start) /* remove em */
- if (p2 = strrchr (url_host, ')'))
- *p2 = '\0';
- }
- /* make a small validator */
- if (*url_host)
- {
- p = url_host;
- /* skip / */
- while (*p == '/')
- p++;
- if (*p)
- {
- int trailing_chars = (strlen (url_checker+1) - strlen (url_host)) > 0 ? TRUE : FALSE;
- url_ok = TRUE; /* yup, let's make a link */
- /* a little string copy and manipulation */
-
- output_string (allow_br, "<a href=\"", handle);
- output_string (allow_br, url_types[url_index], handle);
- output_char (allow_br, ':', handle);
- output_string (allow_br, url_host, handle);
- output_string (allow_br, "\">", handle);
- if (trailing_chars)
- {
- int url_length = (int) strlen (url_host);
- /* find what are going to be hightlighted */
- strcpy (url_host, word); /* copy all - using the array */
- url_host[count + url_length] = '\0'; /* zero it */
- /* highlight stuff */
- output_parse_string (allow_br, url_host, handle);
- /* and what's left are not going to be highlighted */
- output_string (allow_br, "</a>", handle);
- /* let's output that */
- output_parse_string (allow_br, &word[count + url_length], handle);
- }
- else
- {
- output_parse_string (allow_br, word, handle);
- output_string (allow_br, "</a>", handle);
- }
- }
- }
- }
- }
- url_index++;
- }
- if (!url_ok)
- {
- /* not a valid url */
- output_parse_string (allow_br, word, handle);
- }
- }
- else if (!nomail && (email_checker = strchr (word, '@')))
- {
- /* or perhaps a email address
- * examples <tommya@post3.tele.dk> (me@here) him@there "user@anywhere"
- */
- int email_ok = FALSE;
- unsigned char email_address[WORD_LENGTH+1];
- register unsigned char *p = (unsigned char *) word;
-
- strcpy (email_address, p);
- p = email_address;
- if (*p == '@') /* start with a @ ?? */
- *p = '\0'; /* surely an illegal emailaddress */
-
- while (*p && !strchr (">\"\'\\);", (int) *p)) /* look for ">');\ that ends the emailaddress */
- p++;
- *p = '\0'; /* found the end of the emailaddress */
-
- /* remove _trailing_ dots, commas, !, ?. Still legal: user@host?subject */
- if (*email_address)
- {
- p = email_address + strlen (email_address) - 1;
- while ((p >= email_address) && strchr (",.?!:", (int)*p)) /* protect me */
- *p-- = '\0';
- }
- email_checker = strchr (email_address, '@'); /* better check again for @ */
- if (email_checker && strlen (email_checker+1) && !strchr (email_checker+1, '@')) /* a host, length and no double @ ?? */
- {
- /* okay, lets make a mailto */
- email_ok = TRUE;
- output_string (allow_br, "<a href=\"mailto:", handle);
- output_string (allow_br, email_address, handle);
- output_string (allow_br, "\">", handle);
- if (strlen (email_address) != strlen (word))
- {
- output_parse_string (allow_br, email_address, handle);
- output_string (allow_br, "</a>", handle);
- /* not highlighted stuff */
- output_parse_string (allow_br, &word[(int)strlen (email_address)], handle);
- }
- else /* no trailing things */
- {
- output_parse_string (allow_br, word, handle);
- output_string (allow_br, "</a>", handle);
- }
- }
- if (!email_ok)
- {
- /* not a valid url */
- output_parse_string (allow_br, word, handle);
- }
- }
- else /* nop, just parse the string */
- output_parse_string (allow_br, word, handle);
- }
-
-